


# Setup ------------------------------------------------------------------------
  # Options
  options(stringsAsFactors = F)
  # Packages
  library(pacman)
  p_load(lfe, rgdal, rgeos, data.table, magrittr)
  # Directories
  dir_project <- "/Users/edwardarubin/Dropbox/Research/MyProjects/NaturalGas/"
  dir_csv     <- dir_project %>% paste0("DataCsv/")
  dir_geozip  <- dir_csv %>% paste0("ZipCodes/GeoCodedCalifornia/")
  dir_rds     <- dir_project %>% paste0("DataR/")
  dir_results <- dir_rds %>% paste0("Results/Results20171024/")
  dir_spatial <- dir_project %>% paste0("DataSpatial/")
  dir_block   <- dir_spatial %>% paste0("CA_block_2010/")

# Load and merge zip code datasets ---------------------------------------------
  # Load 9-digit zip code counts
  zip_counts <- dir_results %>% paste0("zip9CommonCounts.rds") %>% readRDS()
  # Convert zip9 to integer
  zip_counts[, zip9 := as.integer(zip9)]
  # Load geocoded datasets
  geo_dt <- lapply(X = 1:5, FUN = function(i) {
    # Load the ith file
    tmp <- paste0(dir_geozip, "set", i, "_geocoded.csv") %>% fread()
    # Grab subset that intersects with our boundary dataset
    tmp <- tmp[zip9 %in% zip_counts$zip9, .(zip9, x = X, y = Y)]
    # Return
    return(tmp)
  }) %>% rbindlist()
  # Join the datasets
  geo_dt %<>% merge(zip_counts, by = "zip9", all.x = T, all.y = F)

# Load Census block group shapefile --------------------------------------------
  # Load it (takes some time)
  block_shp <- readOGR(dsn = dir_block, layer = "CA_block_2010")

# Find block group for each 9-digit zip code -----------------------------------
  # Load one of the
  # Create spatial points data frame
  geo_sp <- geo_dt
  coordinates(geo_sp) <- ~ x + y
  # Add (known from shapefile)
  proj4string(geo_sp) <- "+proj=longlat +datum=WGS84 +no_defs"
  # Re-project to match Census shapefile
  geo_sp <- spTransform(x = geo_sp, CRSobj = proj4string(block_shp))
  # Locate blocks (polygon) for each 9-digit zip code (point)
  geo_over <- sp::over(x = geo_sp, y = block_shp, returnList = F)
  geo_over %<>% data.table()
  # Bind the block information onto the zip code data table
  geo_dt %<>% cbind(geo_over[, .(
    STATEFP10, COUNTYFP10, TRACTCE10, BLOCKCE10, NAME10, GEOID10, GISJOIN)])

# Load Census block data -------------------------------------------------------
  # Grab descriptions
  block_desc <- fread(paste0(dir_csv, "NHGIS/nhgis_2010_block.csv"),
    nrows = 1)
  # Grab data without descriptions
  block_dt <- fread(paste0(dir_csv, "NHGIS/nhgis_2010_block.csv"),
    header = F, skip = 2, col.names = names(block_desc))
  # Grab and name desired columns
  block_dt <- block_dt[, .(
    GISJOIN,
    # NAME10 = NAME,
    urban_rural = URBRURALA,
    pop_total = H7V001,
    pop_f = H76026,
    pop_m = H76002,
    pop_white = H7X002,
    pop_black = H7X003,
    pop_sub18 = H76003 + H76004 + H76005 + H76006 +
      H76027 + H76028 + H76029 + H76030,
    n_hh = H8C001,
    n_hh_family = H8C002,
    n_families = H8T001,
    n_units = IFC001,
    n_occupied = IFE002,
    n_mortgage = IFF002,
    n_owned = IFF003,
    n_rented = IFF004,
    n_for_sale = IFG004,
    NULL
  )]

# Merge datasets ---------------------------------------------------------------
  # Merge by GEOID
  zip_dt <- merge(x = geo_dt, y = block_dt,
    by = "GISJOIN", all.x = T, all.y = F)

# Add a few variables ----------------------------------------------------------
  # Utility
  zip_dt[n_pge > 0, utility := "pge"]
  zip_dt[n_socal > 0, utility := "socal"]
  zip_dt[n_pge > 0 & n_socal > 0, utility := "both"]
  # Representation in our data
  zip_dt[, n_data := n_pge + n_socal]
  # Load the zip-city map and restrict to California; drop state
  zip_city <- fread(paste0(dir_csv, "zipCodeDatabase.csv"),
    select = c("zip", "state", "primary_city"))[state == "CA"][, state := NULL]
  setnames(zip_city, c("zip", "city"))
  zip_city[, zip := as.character(zip)]
  # Join city data onto zip_dt
  zip_dt %<>% merge(y = zip_city, by = "zip", all.x = T, all.y = F)

# Create Census-block level dataset --------------------------------------------
  # Create a block-level dataset
  b_dt <- copy(zip_dt)
  # Aggregate to the Census block
  b_dt <- b_dt[, .(
    n_pge      = sum(n_pge),
    n_socal    = sum(n_socal),
    x          = mean(x),
    y          = mean(y),
    zip        = first(zip),
    city       = first(city),
    urban_rural= first(urban_rural),
    pop_total  = sum(pop_total),
    pop_f      = sum(pop_f),
    pop_m      = sum(pop_m),
    pop_white  = sum(pop_white),
    pop_black  = sum(pop_black),
    pop_sub18  = sum(pop_sub18),
    n_hh       = sum(n_hh),
    n_hh_family= sum(n_hh_family),
    n_families = sum(n_families),
    n_units    = sum(n_units),
    n_occupied = sum(n_occupied),
    n_mortgage = sum(n_mortgage),
    n_owned    = sum(n_owned),
    n_rented   = sum(n_rented),
    n_for_sale = sum(n_for_sale),
    n_data     = sum(n_data)
  ), by = GEOID10]
  # Create utility variable
  b_dt[n_pge > 0, utility := "pge"]
  b_dt[n_socal > 0, utility := "socal"]
  b_dt[n_pge > 0 & n_socal > 0, utility := "both"]

# Drop geographies served by both utilities ------------------------------------
  # 9-digit zip codes
  zip_dt <- zip_dt[utility != "both"]
  # Blocks
  b_dt <- b_dt[utility != "both"]

# Test balance -----------------------------------------------------------------

  # Population
  felm(pop_total ~ utility | 0 | 0 | 0,
    weights = b_dt[,n_data],
    data = b_dt, exactDOF = T) %>% summary()

  # Percent female
  felm(I(pop_f/pop_total) ~ utility | 0 | 0 | 0,
    weights = b_dt[,n_data],
    data = b_dt) %>% summary()

  # Percent white
  felm(I(pop_white/pop_total) ~ utility | 0 | 0 | 0,
    weights = b_dt[,n_data],
    data = b_dt) %>% summary()
  # Percent white
  felm(I(pop_white/pop_total) ~ utility | city | 0 | 0,
    weights = b_dt[,n_data],
    data = b_dt) %>% summary()
  # Percent white
  felm(I(pop_white/pop_total) ~ utility | zip | 0 | 0,
    weights = b_dt[,n_data],
    data = b_dt) %>% summary()

  # Percentage of population under 18
  felm(I(pop_sub18/pop_total) ~ utility | 0 | 0 | 0,
    weights = b_dt[,n_data],
    data = b_dt) %>% summary()

  # Longitude
  felm(x ~ utility | 0 | 0 | 0,
    weights = b_dt[,n_data],
    data = b_dt) %>% summary()

  # Latitude
  felm(y ~ utility | 0 | 0 | 0,
    weights = b_dt[,n_data],
    data = b_dt) %>% summary()

  # Urban
  felm(I(urban_rural == "U") ~ utility | 0 | 0 | 0,
    weights = b_dt[,n_data],
    data = b_dt) %>% summary()

  # Number of households
  felm(n_hh ~ utility | 0 | 0 | 0,
    weights = b_dt[,n_data],
    data = b_dt) %>% summary()

  # Number of family households
  felm(n_hh_family ~ utility | 0 | 0 | 0,
    weights = b_dt[,n_data],
    data = b_dt) %>% summary()

  # Number of housing units
  felm(n_units ~ utility | 0 | 0 | 0,
    weights = b_dt[,n_data],
    data = b_dt) %>% summary()

  # Share of housing units are occupied
  felm(I(n_occupied/n_units) ~ utility | 0 | 0 | 0,
    weights = b_dt[,n_data],
    data = b_dt) %>% summary()

  # Share of housing units that are mortgaged or loaned
  felm(I(n_mortgage/n_units) ~ utility | 0 | 0 | 0,
    weights = b_dt[,n_data],
    data = b_dt) %>% summary()

  # Share of housing units that are rented
  felm(I(n_rented/n_units) ~ utility | 0 | 0 | 0,
    weights = b_dt[,n_data],
    data = b_dt) %>% summary()
  # Share of housing units that are rented
  felm(I(n_rented/n_units) ~ utility | city | 0 | 0,
    weights = b_dt[,n_data],
    data = b_dt) %>% summary()
  # Share of housing units that are rented
  felm(I(n_rented/n_units) ~ utility | zip | 0 | 0,
    weights = b_dt[,n_data],
    data = b_dt) %>% summary()

  # Share of housing units that are owned
  felm(I(n_owned/n_units) ~ utility | 0 | 0 | 0,
    weights = b_dt[,n_data],
    data = b_dt) %>% summary()

  # Share of housing units that are for sale
  felm(I(n_for_sale/n_units) ~ utility | 0 | 0 | 0,
    weights = b_dt[,n_data],
    data = b_dt) %>% summary()
